# Load config
source("src/config.R")
# Read integrated omics file
# rp <- readRDS("/s/project/mitoMultiOmics/multiOMICs_integration/processed_data/integration/patient_omics.RDS") %>% as.data.table()
rp <- readRDS(snakemake@input$patient_omics) %>% as.data.table()
rp <- rp[ gene_detected != "no RNA"]
rp <- rp[ !is.na(normcounts )]
ggplot(rp, aes(log10(normcounts), fill = gene_detected))+
geom_density(alpha = 0.4)+
theme_bw()+
ggtitle("RNA counts for non-detected proteins")

rm(rp)
# Get all protein coding genes
# genecode_v29 <- fread('/s/project/mitoMultiOmics/multiOMICs_integration/datasets/gene_annotation_v29.tsv')
genecode_v29 <- fread(snakemake@input$gencode_annotation)
genecode_v29[, geneID := toupper(gene_name)]
genecode_v29 <- genecode_v29[ , .(geneID , gene_type)]
genecode_v29 <- genecode_v29[!duplicated(genecode_v29)]
genecode_v29 <- genecode_v29[order(gene_type)]
genecode <- aggregate(genecode_v29[, -1], by= list(genecode_v29$geneID), paste)
setnames(genecode, c("geneID" , "gencode_v29") )
genecode$gencode_v29 <- as.character(genecode$gencode_v29)
genecode <- genecode[!duplicated(genecode), ]
rm(genecode_v29)
# Load disease genes table
# dis_genes <- fread('/s/project/mitoMultiOmics/multiOMICs_integration/datasets/disease_genes.tsv')
dis_genes <- fread(snakemake@input$disease_genes)
dis_genes <- dis_genes[ , .(geneID , DISEASE)]
dis_genes <- dis_genes[!duplicated(dis_genes)]
dis_genes <- dis_genes[order(DISEASE)]
dg <- aggregate(dis_genes[, -1], by= list(dis_genes$geneID), paste)
setnames(dg, c("geneID" , "disease") )
dg$disease <- as.character(dg$disease)
dg <- dg[!duplicated(dg), ]
rm(dis_genes)
allgenes <- merge(genecode, dg, by = "geneID", all.x = T )
rm(genecode, dg)
# Load list of genes, detected by RNS-seq
# detected_transcripts <- fread('/s/project/mitoMultiOmics/multiOMICs_integration/processed_data/integration/detected_transcripts.tsv')
detected_transcripts <- fread(snakemake@input$detected_transcripts)
detected_transcripts[once == T , fib_RNA := "once"]
detected_transcripts[half == T , fib_RNA := "half of the samples"]
detected_transcripts[all == T , fib_RNA := "all of the samples"]
detected_transcripts <- detected_transcripts[ , .(geneID, fib_RNA)]
detected_transcripts <- detected_transcripts[!duplicated(detected_transcripts)]
allgenes <- as.data.table( merge(allgenes, detected_transcripts, by = "geneID", all.x = T) )
allgenes[is.na(fib_RNA), fib_RNA := "not detected" ]
# Load list of genes, detected by proteomics
# detected_proteins <- fread('/s/project/mitoMultiOmics/multiOMICs_integration/processed_data/integration/detected_proteins.tsv')
detected_proteins <- fread(snakemake@input$detected_proteins)
detected_proteins[once == T , fib_protein := "once"]
detected_proteins[half == T , fib_protein := "half of the samples"]
detected_proteins[all == T , fib_protein := "all of the samples"]
detected_proteins <- detected_proteins[ , .(geneID, fib_protein)]
detected_proteins <- detected_proteins[!duplicated(detected_proteins)]
allgenes <- as.data.table( merge(allgenes, detected_proteins, by = "geneID", all.x = T) )
allgenes[is.na(fib_protein), fib_protein := "not detected" ]
# Load list of genes, detected by GTEx proteomics
# detected_proteins_gtex <- fread('/s/project/mitoMultiOmics/multiOMICs_integration/processed_data/integration/detected_proteins_gtex.tsv')
detected_proteins_gtex <- fread(snakemake@input$detected_proteins_gtex)
detected_proteins_gtex[once == T , protein := "once"]
detected_proteins_gtex[half == T , protein := "half of the samples"]
detected_proteins_gtex[all == T , protein := "all of the samples"]
detected_proteins_gtex <- detected_proteins_gtex[ , .(geneID, protein, TISSUE)]
detected_proteins_gtex <- detected_proteins_gtex[!is.na(protein)]
detected_proteins_gtex[ , gtex_protein := paste(protein , "in", TISSUE)]
detected_proteins_gtex <- detected_proteins_gtex[ , .(geneID, gtex_protein)]
detected_proteins_gtex <- detected_proteins_gtex[!duplicated(detected_proteins_gtex)]
dp_gtex <- aggregate(detected_proteins_gtex[, -1], by= list(detected_proteins_gtex$geneID), paste)
setnames(dp_gtex, c("geneID" , "gtex_protein") )
dp_gtex$gtex_protein <- as.character(dp_gtex$gtex_protein)
dp_gtex <- dp_gtex[!duplicated(dp_gtex), ]
rm(detected_proteins_gtex)
allgenes <- as.data.table( merge(allgenes, dp_gtex, by = "geneID", all.x = T) )
allgenes[is.na(gtex_protein), gtex_protein := "not detected" ]
allgenes <- allgenes[ !(fib_RNA == "not detected" & fib_protein == "not detected" & gtex_protein == "not detected" )]
allgenes <- allgenes[order(disease)]
IyctLS0KIycgdGl0bGU6IEdlbmUgZGV0ZWN0aW9uIGJ5IE9NSUNzCiMnIGF1dGhvcjogc21pcm5vdmQKIycgd2I6CiMnICBpbnB1dDoKIycgIC0gcGF0aWVudF9vbWljczogJ2BzbSBjb25maWdbIlBST0NfREFUQSJdICsgIi9pbnRlZ3JhdGlvbi9wYXRpZW50X29taWNzLlJEUyJgJwojJyAgLSBnZW5jb2RlX2Fubm90YXRpb246ICdgc20gY29uZmlnWyJEQVRBU0VUUyJdICsgIi9nZW5lX2Fubm90YXRpb25fdjI5LnRzdiJgJwojJyAgLSBkZXRlY3RlZF90cmFuc2NyaXB0czogJ2BzbSBjb25maWdbIlBST0NfREFUQSJdICsgIi9pbnRlZ3JhdGlvbi9kZXRlY3RlZF90cmFuc2NyaXB0cy50c3YiYCcKIycgIC0gZGV0ZWN0ZWRfcHJvdGVpbnM6ICdgc20gY29uZmlnWyJQUk9DX0RBVEEiXSArICIvaW50ZWdyYXRpb24vZGV0ZWN0ZWRfcHJvdGVpbnMudHN2ImAnCiMnICAtIGRldGVjdGVkX3Byb3RlaW5zX2d0ZXg6ICdgc20gY29uZmlnWyJQUk9DX0RBVEEiXSArICIvaW50ZWdyYXRpb24vZGV0ZWN0ZWRfcHJvdGVpbnNfZ3RleC50c3YiYCcKIycgIC0gZGlzZWFzZV9nZW5lczogJ2BzbSBjb25maWdbIkRBVEFTRVRTIl0gKyAiL2Rpc2Vhc2VfZ2VuZXMudHN2ImAnCiMnICBvdXRwdXQ6CiMnIG91dHB1dDogCiMnICAgaHRtbF9kb2N1bWVudDoKIycgICAgY29kZV9mb2xkaW5nOiBoaWRlCiMnICAgIGNvZGVfZG93bmxvYWQ6IFRSVUUKIyctLS0KCiMgTG9hZCBjb25maWcKc291cmNlKCJzcmMvY29uZmlnLlIiKQoKCiMgUmVhZCBpbnRlZ3JhdGVkIG9taWNzIGZpbGUgCiMgcnAgPC0gcmVhZFJEUygiL3MvcHJvamVjdC9taXRvTXVsdGlPbWljcy9tdWx0aU9NSUNzX2ludGVncmF0aW9uL3Byb2Nlc3NlZF9kYXRhL2ludGVncmF0aW9uL3BhdGllbnRfb21pY3MuUkRTIikgJT4lIGFzLmRhdGEudGFibGUoKQpycCA8LSByZWFkUkRTKHNuYWtlbWFrZUBpbnB1dCRwYXRpZW50X29taWNzKSAlPiUgYXMuZGF0YS50YWJsZSgpCgpycCA8LSBycFsgZ2VuZV9kZXRlY3RlZCAhPSAibm8gUk5BIl0KcnAgPC0gcnBbICFpcy5uYShub3JtY291bnRzICldCgpnZ3Bsb3QocnAsIGFlcyhsb2cxMChub3JtY291bnRzKSwgZmlsbCA9IGdlbmVfZGV0ZWN0ZWQpKSsKICBnZW9tX2RlbnNpdHkoYWxwaGEgPSAwLjQpKwogIHRoZW1lX2J3KCkrCiAgZ2d0aXRsZSgiUk5BIGNvdW50cyBmb3Igbm9uLWRldGVjdGVkIHByb3RlaW5zIikKCgpybShycCkKCiMgR2V0IGFsbCBwcm90ZWluIGNvZGluZyBnZW5lcwojIGdlbmVjb2RlX3YyOSA8LSBmcmVhZCgnL3MvcHJvamVjdC9taXRvTXVsdGlPbWljcy9tdWx0aU9NSUNzX2ludGVncmF0aW9uL2RhdGFzZXRzL2dlbmVfYW5ub3RhdGlvbl92MjkudHN2JykKZ2VuZWNvZGVfdjI5IDwtIGZyZWFkKHNuYWtlbWFrZUBpbnB1dCRnZW5jb2RlX2Fubm90YXRpb24pCmdlbmVjb2RlX3YyOVssIGdlbmVJRCA6PSB0b3VwcGVyKGdlbmVfbmFtZSldCmdlbmVjb2RlX3YyOSA8LSBnZW5lY29kZV92MjlbICwgLihnZW5lSUQgLCBnZW5lX3R5cGUpXQpnZW5lY29kZV92MjkgPC0gZ2VuZWNvZGVfdjI5WyFkdXBsaWNhdGVkKGdlbmVjb2RlX3YyOSldCmdlbmVjb2RlX3YyOSA8LSBnZW5lY29kZV92Mjlbb3JkZXIoZ2VuZV90eXBlKV0KCmdlbmVjb2RlIDwtIGFnZ3JlZ2F0ZShnZW5lY29kZV92MjlbLCAtMV0sIGJ5PSBsaXN0KGdlbmVjb2RlX3YyOSRnZW5lSUQpLCBwYXN0ZSkKc2V0bmFtZXMoZ2VuZWNvZGUsIGMoImdlbmVJRCIgLCAiZ2VuY29kZV92MjkiKSApCmdlbmVjb2RlJGdlbmNvZGVfdjI5IDwtIGFzLmNoYXJhY3RlcihnZW5lY29kZSRnZW5jb2RlX3YyOSkKZ2VuZWNvZGUgPC0gZ2VuZWNvZGVbIWR1cGxpY2F0ZWQoZ2VuZWNvZGUpLCBdCnJtKGdlbmVjb2RlX3YyOSkKCiMgTG9hZCBkaXNlYXNlIGdlbmVzIHRhYmxlCiMgZGlzX2dlbmVzIDwtIGZyZWFkKCcvcy9wcm9qZWN0L21pdG9NdWx0aU9taWNzL211bHRpT01JQ3NfaW50ZWdyYXRpb24vZGF0YXNldHMvZGlzZWFzZV9nZW5lcy50c3YnKQpkaXNfZ2VuZXMgPC0gZnJlYWQoc25ha2VtYWtlQGlucHV0JGRpc2Vhc2VfZ2VuZXMpCmRpc19nZW5lcyA8LSBkaXNfZ2VuZXNbICwgLihnZW5lSUQgLCBESVNFQVNFKV0KZGlzX2dlbmVzIDwtIGRpc19nZW5lc1shZHVwbGljYXRlZChkaXNfZ2VuZXMpXQpkaXNfZ2VuZXMgPC0gZGlzX2dlbmVzW29yZGVyKERJU0VBU0UpXQoKZGcgPC0gYWdncmVnYXRlKGRpc19nZW5lc1ssIC0xXSwgYnk9IGxpc3QoZGlzX2dlbmVzJGdlbmVJRCksIHBhc3RlKQpzZXRuYW1lcyhkZywgYygiZ2VuZUlEIiAsICJkaXNlYXNlIikgKQpkZyRkaXNlYXNlIDwtIGFzLmNoYXJhY3RlcihkZyRkaXNlYXNlKQpkZyA8LSBkZ1shZHVwbGljYXRlZChkZyksIF0Kcm0oZGlzX2dlbmVzKQoKYWxsZ2VuZXMgPC0gbWVyZ2UoZ2VuZWNvZGUsIGRnLCBieSA9ICJnZW5lSUQiLCBhbGwueCA9IFQgICApCnJtKGdlbmVjb2RlLCBkZykKCgojIExvYWQgbGlzdCBvZiBnZW5lcywgZGV0ZWN0ZWQgYnkgUk5TLXNlcQojIGRldGVjdGVkX3RyYW5zY3JpcHRzIDwtIGZyZWFkKCcvcy9wcm9qZWN0L21pdG9NdWx0aU9taWNzL211bHRpT01JQ3NfaW50ZWdyYXRpb24vcHJvY2Vzc2VkX2RhdGEvaW50ZWdyYXRpb24vZGV0ZWN0ZWRfdHJhbnNjcmlwdHMudHN2JykKZGV0ZWN0ZWRfdHJhbnNjcmlwdHMgPC0gZnJlYWQoc25ha2VtYWtlQGlucHV0JGRldGVjdGVkX3RyYW5zY3JpcHRzKQpkZXRlY3RlZF90cmFuc2NyaXB0c1tvbmNlID09IFQgLCBmaWJfUk5BIDo9ICJvbmNlIl0KZGV0ZWN0ZWRfdHJhbnNjcmlwdHNbaGFsZiA9PSBUICwgZmliX1JOQSA6PSAiaGFsZiBvZiB0aGUgc2FtcGxlcyJdCmRldGVjdGVkX3RyYW5zY3JpcHRzW2FsbCA9PSBUICwgZmliX1JOQSA6PSAiYWxsIG9mIHRoZSBzYW1wbGVzIl0KZGV0ZWN0ZWRfdHJhbnNjcmlwdHMgPC0gZGV0ZWN0ZWRfdHJhbnNjcmlwdHNbICwgLihnZW5lSUQsIGZpYl9STkEpXQpkZXRlY3RlZF90cmFuc2NyaXB0cyA8LSBkZXRlY3RlZF90cmFuc2NyaXB0c1shZHVwbGljYXRlZChkZXRlY3RlZF90cmFuc2NyaXB0cyldCgoKYWxsZ2VuZXMgPC0gYXMuZGF0YS50YWJsZSggbWVyZ2UoYWxsZ2VuZXMsIGRldGVjdGVkX3RyYW5zY3JpcHRzLCBieSA9ICJnZW5lSUQiLCBhbGwueCA9IFQpICkKYWxsZ2VuZXNbaXMubmEoZmliX1JOQSksIGZpYl9STkEgOj0gIm5vdCBkZXRlY3RlZCIgXQoKIyBMb2FkIGxpc3Qgb2YgZ2VuZXMsIGRldGVjdGVkIGJ5IHByb3Rlb21pY3MKIyBkZXRlY3RlZF9wcm90ZWlucyA8LSBmcmVhZCgnL3MvcHJvamVjdC9taXRvTXVsdGlPbWljcy9tdWx0aU9NSUNzX2ludGVncmF0aW9uL3Byb2Nlc3NlZF9kYXRhL2ludGVncmF0aW9uL2RldGVjdGVkX3Byb3RlaW5zLnRzdicpCmRldGVjdGVkX3Byb3RlaW5zIDwtIGZyZWFkKHNuYWtlbWFrZUBpbnB1dCRkZXRlY3RlZF9wcm90ZWlucykKZGV0ZWN0ZWRfcHJvdGVpbnNbb25jZSA9PSBUICwgZmliX3Byb3RlaW4gOj0gIm9uY2UiXQpkZXRlY3RlZF9wcm90ZWluc1toYWxmID09IFQgLCBmaWJfcHJvdGVpbiA6PSAiaGFsZiBvZiB0aGUgc2FtcGxlcyJdCmRldGVjdGVkX3Byb3RlaW5zW2FsbCA9PSBUICwgZmliX3Byb3RlaW4gOj0gImFsbCBvZiB0aGUgc2FtcGxlcyJdCmRldGVjdGVkX3Byb3RlaW5zIDwtIGRldGVjdGVkX3Byb3RlaW5zWyAsIC4oZ2VuZUlELCBmaWJfcHJvdGVpbildCmRldGVjdGVkX3Byb3RlaW5zIDwtIGRldGVjdGVkX3Byb3RlaW5zWyFkdXBsaWNhdGVkKGRldGVjdGVkX3Byb3RlaW5zKV0KYWxsZ2VuZXMgPC0gYXMuZGF0YS50YWJsZSggbWVyZ2UoYWxsZ2VuZXMsIGRldGVjdGVkX3Byb3RlaW5zLCBieSA9ICJnZW5lSUQiLCBhbGwueCA9IFQpICkKYWxsZ2VuZXNbaXMubmEoZmliX3Byb3RlaW4pLCBmaWJfcHJvdGVpbiA6PSAibm90IGRldGVjdGVkIiBdCgoKIyBMb2FkIGxpc3Qgb2YgZ2VuZXMsIGRldGVjdGVkIGJ5IEdURXggcHJvdGVvbWljcwojIGRldGVjdGVkX3Byb3RlaW5zX2d0ZXggPC0gZnJlYWQoJy9zL3Byb2plY3QvbWl0b011bHRpT21pY3MvbXVsdGlPTUlDc19pbnRlZ3JhdGlvbi9wcm9jZXNzZWRfZGF0YS9pbnRlZ3JhdGlvbi9kZXRlY3RlZF9wcm90ZWluc19ndGV4LnRzdicpCmRldGVjdGVkX3Byb3RlaW5zX2d0ZXggPC0gZnJlYWQoc25ha2VtYWtlQGlucHV0JGRldGVjdGVkX3Byb3RlaW5zX2d0ZXgpCmRldGVjdGVkX3Byb3RlaW5zX2d0ZXhbb25jZSA9PSBUICwgcHJvdGVpbiA6PSAib25jZSJdCmRldGVjdGVkX3Byb3RlaW5zX2d0ZXhbaGFsZiA9PSBUICwgcHJvdGVpbiA6PSAiaGFsZiBvZiB0aGUgc2FtcGxlcyJdCmRldGVjdGVkX3Byb3RlaW5zX2d0ZXhbYWxsID09IFQgLCBwcm90ZWluIDo9ICJhbGwgb2YgdGhlIHNhbXBsZXMiXQpkZXRlY3RlZF9wcm90ZWluc19ndGV4IDwtIGRldGVjdGVkX3Byb3RlaW5zX2d0ZXhbICwgLihnZW5lSUQsIHByb3RlaW4sIFRJU1NVRSldCmRldGVjdGVkX3Byb3RlaW5zX2d0ZXggPC0gZGV0ZWN0ZWRfcHJvdGVpbnNfZ3RleFshaXMubmEocHJvdGVpbildCmRldGVjdGVkX3Byb3RlaW5zX2d0ZXhbICwgZ3RleF9wcm90ZWluIDo9IHBhc3RlKHByb3RlaW4gLCAiaW4iLCAgVElTU1VFKV0KZGV0ZWN0ZWRfcHJvdGVpbnNfZ3RleCA8LSBkZXRlY3RlZF9wcm90ZWluc19ndGV4WyAsIC4oZ2VuZUlELCBndGV4X3Byb3RlaW4pXQpkZXRlY3RlZF9wcm90ZWluc19ndGV4IDwtIGRldGVjdGVkX3Byb3RlaW5zX2d0ZXhbIWR1cGxpY2F0ZWQoZGV0ZWN0ZWRfcHJvdGVpbnNfZ3RleCldCgpkcF9ndGV4IDwtIGFnZ3JlZ2F0ZShkZXRlY3RlZF9wcm90ZWluc19ndGV4WywgLTFdLCBieT0gbGlzdChkZXRlY3RlZF9wcm90ZWluc19ndGV4JGdlbmVJRCksIHBhc3RlKQpzZXRuYW1lcyhkcF9ndGV4LCBjKCJnZW5lSUQiICwgImd0ZXhfcHJvdGVpbiIpICkKZHBfZ3RleCRndGV4X3Byb3RlaW4gPC0gYXMuY2hhcmFjdGVyKGRwX2d0ZXgkZ3RleF9wcm90ZWluKQpkcF9ndGV4IDwtIGRwX2d0ZXhbIWR1cGxpY2F0ZWQoZHBfZ3RleCksIF0Kcm0oZGV0ZWN0ZWRfcHJvdGVpbnNfZ3RleCkKCgphbGxnZW5lcyA8LSBhcy5kYXRhLnRhYmxlKCBtZXJnZShhbGxnZW5lcywgZHBfZ3RleCwgYnkgPSAiZ2VuZUlEIiwgYWxsLnggPSBUKSApCmFsbGdlbmVzW2lzLm5hKGd0ZXhfcHJvdGVpbiksIGd0ZXhfcHJvdGVpbiA6PSAibm90IGRldGVjdGVkIiBdCgoKYWxsZ2VuZXMgPC0gYWxsZ2VuZXNbICEoZmliX1JOQSA9PSAibm90IGRldGVjdGVkIiAmICBmaWJfcHJvdGVpbiA9PSAibm90IGRldGVjdGVkIiAmIGd0ZXhfcHJvdGVpbiA9PSAibm90IGRldGVjdGVkIiApXQoKYWxsZ2VuZXMgPC0gYWxsZ2VuZXNbb3JkZXIoZGlzZWFzZSldCgojK2VjaG89RgpEVDo6ZGF0YXRhYmxlKGFsbGdlbmVzLCBjYXB0aW9uID0gIkdlbmUgY292ZXJhZ2UgYnkgb21pY3MiLCAKICAgICAgICAgICAgICBzdHlsZSA9ICdib290c3RyYXAnLCBmaWx0ZXIgPSAndG9wJywgZXNjYXBlID0gRiwKICAgICAgICAgICAgICBleHRlbnNpb25zID0gYyggJ0J1dHRvbnMnLCAnQ29sUmVvcmRlcicgKSwgCiAgICAgICAgICAgICAgb3B0aW9ucyA9IGxpc3QoIGNvbFJlb3JkZXIgPSBUUlVFLCBkb20gPSAnQmZydGlwJywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYnV0dG9ucyA9IGMoJ2NvcHknLCAnY3N2JywgJ2V4Y2VsJywgJ3BkZicsICdwcmludCcpKSkKCgoKCgoKCgoKCg==